001    /*
002     * CondorDispatcher.java
003     *
004     * Created on July 17, 2003, 11:17 AM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2003 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    package gov.bnl.star.offline.scheduler.condorg;
024    
025    import gov.bnl.star.offline.scheduler.*;
026    import gov.bnl.star.offline.scheduler.ComponentLibrary;
027    import gov.bnl.star.offline.scheduler.Dispatchers.lsf.CSHApplication;
028    import gov.bnl.star.offline.scheduler.Dispatchers.lsf.LSFDispatcher;
029    import gov.bnl.star.offline.scheduler.util.CSHCommandLineTask;
030    import gov.bnl.star.offline.scheduler.util.FilesystemToolkit;
031    //import gov.bnl.star.offline.scheduler.util.StatisticsRecorder; //Moved Statistics recording to Scheduler.java LH
032    
033    import java.io.File;
034    import java.io.FileOutputStream;
035    import java.io.PrintStream;
036    import java.util.*;
037    
038    import java.util.logging.Level;
039    import java.util.logging.Logger;
040    
041    
042    /** Dispatches jobs using Condor-G on a remote site that uses LSF. It will use some
043     * extra rsl attributes created to command some extra features such as mail
044     * notification, resource usage, job name and target machine. These extra LSF
045     * attribute require a patch to the LSF job manager.
046     * @author Gabriele Carcassi
047     * @version 1.0 2003/07/23
048     */
049    public class CondorDispatcher extends LSFDispatcher {
050        static private Logger log = Logger.getLogger(CondorGLSFDispatcher.class.getName());
051        private String condorEx;
052    
053        private String condorOptions;
054        
055        public void setCondorEx(String condorEx) {
056            this.condorEx = condorEx;
057        }
058        
059        public String getCondorEx() {
060            return condorEx;
061        }
062    
063        /** Creates a new dispatcher */
064        public CondorDispatcher() {
065        }
066    
067        /** Creates the scripts and dispatches the job on the target machine.
068         * @param request the job request
069         */
070        public void dispatch(Request request, List jobs) {
071            log.info("Dispatching using Condor: \"" + request.getCommand() +
072                "\"");
073    
074            // Enables the simulation mode if necessary
075            useSimulationMode(request.getSimulation());
076            reportedFailure = false;
077    
078            // Submits from the higher to the lower JobID. This way the
079            // user has a feel of  when the last job is going to be
080            // submitted
081            for (int nProcess = jobs.size() - 1; nProcess >= 0;
082                    nProcess--) {
083                Job job = (Job) jobs.get(nProcess);
084    
085                System.out.print("Dispatching process " +
086                    job.getJobID() + ".");
087                dispatch(request, job);
088            }
089    
090            //StatisticsRecorder.getIntance().recordStatistics(request, jobs);
091        }
092    
093        protected void dispatch(Request request, Job job) {
094            application = (CSHApplication) ComponentLibrary.getInstance().getComponent("CSHApplication");
095    
096            // TODO: all the parameters should be passed in one go
097            application.setJob(request, job);
098            application.setScratchDir(scratchDir);
099            application.setSubmissionCommand(getCondorCommand(request, job));
100    
101            application.prepareJob();
102            prepareClassAd(request, job);
103    
104            log.info("Executing \"" + getCondorCommand(request, job) + "\"");
105    
106            if (!simulation) {
107                try {
108                    Thread.sleep(getMsBtwnSuccess());
109                } catch (Exception e) {
110                }
111    
112                int attempt = 0;
113                boolean success = false;
114    
115                while (!success && (attempt < getMaxAttempts())) {
116                    try {
117                        CSHCommandLineTask task = new CSHCommandLineTask(getCondorCommand(
118                                    request, job), true, 30000);
119                        task.execute();
120    
121                        if (task.getExitStatus() != 0) {
122                            log.warning("bsub failed: " + task.getOutput());
123                            Thread.sleep(getMsBtwnFailure());
124                            System.out.print("/");
125                            attempt++;
126                        } else {
127                            success = true;
128                        }
129                    } catch (Exception e) {
130                        log.log(Level.SEVERE,
131                            "Couldn't submit the script to Condor-g", e);
132    
133                        try {
134                            Thread.sleep(getMsBtwnFailure());
135                        } catch (Exception e1) {
136                        }
137    
138                        System.out.print("/");
139                        attempt++;
140                    }
141                }
142    
143                if (success) {
144                    System.out.println(" done.");
145                } else {
146                    System.out.println(" FAILED!!");
147                }
148            } else {
149                System.out.println(" simulated.");
150            }
151        }
152    
153        /** Returns the command line to submit the job through condor-g.
154         * @param request the request that originated the job
155         * @param job the job to be dispatched
156         * @return the commandline to submit the job
157         */
158        protected String getCondorCommand(Request request, Job job) {
159            return condorEx + " " + getClassAdName(request, job);
160        }
161    
162        /** Returns the name of the file containing the class ad. Class ad is the job
163         * description required by condor to submit a job.
164         * @param request the request that originated the job
165         * @param job the job to be submitted
166         * @return the file name of the class ad
167         */
168        protected String getClassAdName(Request request, Job job) {
169            return "sched" + job.getJobID() + ".condor";
170        }
171        private void prepareClassAd(Request request, Job job) {
172            try {
173                PrintStream classAd = new PrintStream(new FileOutputStream(
174                            new File(getClassAdName(request, job))));
175                createClassAd(request, job, classAd);
176            } catch (Exception e) {
177                log.log(Level.SEVERE, "Couldn't create the class ad", e);
178                throw new RuntimeException("Couldn't create the class ad " +
179                    getClassAdName(request, job) + ": " + e.getMessage());
180            }
181        }
182    
183        private void createClassAd(Request request, Job job,
184            PrintStream classAd) {
185            classAd.println("Universe       = vanilla");
186            classAd.println();
187            //classAd.println("+Experiment = \"star\"");
188            classAd.println("Notification   = never");
189            classAd.print("Executable     = ");
190            classAd.println(getExecutable());
191    
192            if (getArguments() != null) {
193                classAd.print("Arguments      = ");
194                classAd.println(getArguments());
195            }
196    
197            if (application.getStdin() != null) {
198                classAd.print("Input          = ");
199                classAd.println(application.getStdin());
200            }
201    
202            if (application.getStdout() != null) {
203                classAd.print("Output =        ");
204                classAd.println(application.getStdout());
205            }
206    
207            if (application.getStderr() != null) {
208                classAd.print("Error          = ");
209                classAd.println(application.getStderr());
210            }
211    
212            classAd.print("Log            = ");
213            classAd.println(getLogName(job));
214            classAd.println("Getenv         = true");
215    
216            if (getRemoteDirectory() != null) {
217                classAd.print("Initialdir     = ");
218                classAd.println(getRemoteDirectory());
219            }
220    
221            if (getCondorOptions() != null) {
222                classAd.println(getCondorOptions());
223            }
224            
225    //        classAd.println("transfer_executable = false");
226            classAd.println("Queue");
227        }
228    
229        private String getExecutable() {
230            if (application.getCommandLine().indexOf(' ') == -1) {
231                return application.getCommandLine();
232            }
233    
234            return application.getCommandLine().substring(0,
235                application.getCommandLine().indexOf(' '));
236        }
237    
238        private String getArguments() {
239            if (application.getCommandLine().indexOf(' ') == -1) {
240                return null;
241            }
242    
243            return application.getCommandLine().substring(application.getCommandLine()
244                                                                     .indexOf(' ') +
245                1);
246        }
247    
248        private String getLogName(Job job) {
249            // TODO maybe log filename should be put as a general property of Process (as stds)
250            return "sched" + job.getJobID() + ".condor.log";
251        }
252    
253    /*    private String getGlobusScheduler() {
254            //TODO make it flexible
255            return "stargrid01.rcf.bnl.gov/jobmanager-lsf";
256        }*/
257    
258        private String getRemoteDirectory() {
259            // TODO this has to be specified better: remote execution directory could be different from scheduler execution directory
260            return FilesystemToolkit.getCurrentDirectory();
261        }
262    
263        /** Getter for property condorOptions.
264         * @return Value of property condorOptions.
265         *
266         */
267        public String getCondorOptions() {
268            return this.condorOptions;
269        }
270        
271        /** Setter for property condorOptions.
272         * @param condorOptions New value of property condorOptions.
273         *
274         */
275        public void setCondorOptions(String condorOptions) {
276            this.condorOptions = condorOptions;
277        }
278        
279    /*    protected String getResourceUsageSwitch(Process job) {
280            String res = super.getResourceUsageSwitch(job);
281    
282            return res.replaceAll("\"", "\\\\\"");
283        }*/
284    }